Library loading¶

In [1]:
import scanpy as sc, anndata as ad, numpy as np, pandas as pd
from scipy import sparse
from anndata import AnnData
import warnings
import socket
from matplotlib import pylab
import sys
import yaml
import os
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')
In [2]:
import ipynbname
try:
    nb_fname = ipynbname.name()
except:
    nb_fname = "".join(os.path.basename(globals()["__vsc_ipynb_file__"]).split(".")[:-1])
In [3]:
%matplotlib inline
In [4]:
sc.settings.verbosity = 3         # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=50, facecolor='white', dpi_save=500)
pylab.rcParams['figure.figsize'] = (9, 9)
scanpy==1.8.1 anndata==0.7.6 umap==0.4.6 numpy==1.20.2 scipy==1.6.3 pandas==1.2.4 scikit-learn==0.24.2 statsmodels==0.13.1 python-igraph==0.9.8 louvain==0.7.1 pynndescent==0.5.5

Configure paths¶

In [5]:
hostRoot = "-".join(socket.gethostname().split('-')[0:2])


outdir="./outdir"
markerGenes = "./data/resources/marker_genes.tsv"
outdirFigures= "./figures"
In [6]:
adata = sc.read_h5ad(outdir+"/02C_Preprocess_final.h5ad")
In [7]:
adata
Out[7]:
AnnData object with n_obs × n_vars = 18822 × 28371
    obs: 'dataset', 'organoid', 'region', 'type', 'type_region', 'regionContrast', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'is.Stressed'
    var: 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'mean', 'std'
    uns: 'dataset_colors', 'diffmap_evals', 'draw_graph', 'is.Stressed_colors', 'neighbors', 'organoid_colors', 'pca', 'regionContrast_colors', 'region_colors', 'type_colors', 'umap'
    obsm: 'X_diffmap', 'X_draw_graph_fa', 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'

Dataset Plots¶

In [8]:
adata.obs.columns
Out[8]:
Index(['dataset', 'organoid', 'region', 'type', 'type_region',
       'regionContrast', 'n_genes_by_counts', 'log1p_n_genes_by_counts',
       'total_counts', 'log1p_total_counts', 'total_counts_mt',
       'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo',
       'log1p_total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'is.Stressed'],
      dtype='object')

Plot per organoid¶

In [9]:
for group in adata.obs.organoid.unique():
    sc.pl.umap(adata,color="organoid", groups=[group], size = 30, na_color="lightgrey", save=nb_fname+"."+group+".pdf", frameon=False)
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control1.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid1.pdf

Plot per type¶

In [10]:
for group in adata.obs.type.unique():
    sc.pl.umap(adata,color="type", groups=[group], size = 30, na_color="lightgrey", save=nb_fname+"."+group+".pdf", frameon=False)
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid.pdf

Plot per type-region¶

In [11]:
for region in adata.obs.region.unique():
    sc.pl.umap(adata,color="region", groups=[region], size = 30, na_color="lightgrey", save=nb_fname+"."+region+".pdf", frameon=False)
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.piece2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.piece1.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.piece3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.medial.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.distal.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.proximal.pdf

Plot per type-organoid¶

In [12]:
for dataset in adata.obs.dataset.unique():
    sc.pl.umap(adata,color="dataset", groups=[dataset], size = 30, na_color="lightgrey", save=nb_fname+"."+dataset+".pdf", frameon=False)
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control3_piece2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control1_piece1.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control2_piece2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control3_piece1.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control1_piece3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control1_piece2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control2_piece3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control3_piece3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control2_piece1.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid3_medial.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid2_medial.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid3_distal.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid1_distal.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid2_distal.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid1_medial.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid1_proximal.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid2_proximal.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid3_proximal.pdf

Plot per organoid¶

In [13]:
for organoid in adata.obs.organoid.unique():
    sc.pl.umap(adata,color="organoid", groups=[organoid], size = 30, na_color="lightgrey", save=nb_fname+"."+organoid+".pdf", frameon=False)
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control1.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.control2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid3.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid2.pdf
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.polaroid1.pdf

Leiden pre-save¶

In [14]:
sc.tl.leiden(adata, resolution=.3, key_added="leiden0.3")
running Leiden clustering
    finished: found 12 clusters and added
    'leiden0.3', the cluster labels (adata.obs, categorical) (0:00:02)
In [15]:
sc.pl.umap(adata, color=["leiden0.3"], ncols = 2, size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False, save=nb_fname+".complete.pdf")
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.complete.pdf

Annotation¶

In [16]:
leiden3Mapping = {"0":"Encs-1",
"1":"CBC/BRCs-1",
"2":"EXN-1",
"3":"EnCs-2",
"4":"RGCs-1",
"5":"RGCs-2",
"6":"ccRGCs",
"7":"inPCs",
"8":"EXN-2",
"9":"RtCs",
"10":"ER-Cs",
"11":"CBC/BRCs-2"}

adata.obs["leidenAnno"] =  adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()

sc.pl.umap(adata, color=["leiden0.3","leidenAnno"], ncols = 2, size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False, save=nb_fname+".complete.annpo.pdf")
WARNING: saving figure to file figures/umap03_Dimreduct_exploration.complete.annpo.pdf

Marker genes leiden0.3¶

In [17]:
sc.tl.rank_genes_groups(adata, groupby="leidenAnno", method="wilcoxon")
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:44)
In [18]:
pylab.rcParams['figure.figsize'] = (7, 7)

sc.pl.rank_genes_groups(adata)
pylab.rcParams['figure.figsize'] = (9, 9)
In [19]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=5, values_to_plot='logfoldchanges', min_logfoldchange=3, vmax=7, vmin=-7, cmap='bwr',standard_scale='var',save=nb_fname+".topMarkers.pdf")
WARNING: dendrogram data not found (using key=dendrogram_leidenAnno). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently.
    using 'X_pca' with n_pcs = 50
Storing dendrogram info using `.uns['dendrogram_leidenAnno']`
WARNING: saving figure to file figures/dotplot_03_Dimreduct_exploration.topMarkers.pdf
In [20]:
#Save all genes for each leiden
allMarkers = {}
for i in adata.obs["leidenAnno"].unique():
    allMarkers[i] = sc.get.rank_genes_groups_df(adata, group=i)
    allMarkers[i]["leidenAnno"] = i
    allMarkers[i] = allMarkers[i][(allMarkers[i].logfoldchanges > 3) & ( allMarkers[i].pvals_adj < 0.05)]
    
allMarkers = pd.concat(allMarkers, ignore_index=True)
allMarkers.to_excel(outdir+"/"+nb_fname+".SCwilcoxon.LeidenMarkers.xls")
#allMarkers.to_excel()
In [21]:
# matrixplot style
topRankedDict = { i: sc.get.rank_genes_groups_df(adata, group=i).head(5)["names"].tolist() for i in adata.obs["leidenAnno"].unique()}
sc.pl.matrixplot(adata, topRankedDict, "leidenAnno", dendrogram=True,swap_axes=False,
                 colorbar_title='mean z-score',cmap='RdBu_r', standard_scale="var", save=nb_fname+".topMarkers.matrixStyle.pdf")
WARNING: saving figure to file figures/matrixplot_03_Dimreduct_exploration.topMarkers.matrixStyle.pdf

Correlation¶

In [22]:
sc.pl.pca_variance_ratio(adata)
In [23]:
sc.tl.dendrogram(adata,"leidenAnno", n_pcs=15)
sc.pl.correlation_matrix(adata, 'leidenAnno', figsize=(10,8),cmap="PRGn", show_correlation_numbers=False,save=nb_fname+".correlation.pdf")
    using 'X_pca' with n_pcs = 15
Storing dendrogram info using `.uns['dendrogram_leidenAnno']`
WARNING: saving figure to file figures/correlation_matrix03_Dimreduct_exploration.correlation.pdf

UMAP¶

In [24]:
sc.pl.umap(adata, color=["leiden0.3","leidenAnno"], ncols = 2, size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)

Save to adata file¶

In [25]:
adata.write_h5ad(outdir+"/3_polaroid_quickAnno.h5ad")
In [ ]:

In [ ]: